*************************************************************************
* Data
* - Long panel from Schlenker + BE (2016): 1950-2005
*
*************************************************************************

global path_dta_ori "Data_ori"

*************************************************************************
* Data
*************************************************************************

********************************
* Schklenker data
********************************

* Balanced panel 1950-2019, 3,037 counties (after putting it together)
* (3,105 counties, but only 3,037 with cropArea>0 which is used for weighting across stations)
use "$path_dta_ori\Schlenker\Weather_data_1950_2019.dta", clear
tab year

********************************
* Burke and Emerick (2016) data
********************************

* Unbalanced panel 1950-2010, 3,141 counties
use "$path_dta_ori\Burke and Emerick (2016)\us_panel.dta", clear
tab year
codebook fips

* Note that the number of counties decreases substantially in 2006. 2006 has 58% of counties in 2005
count if year==2005
scalar N05=r(N)
count if year==2006
scalar N06=r(N)
di N06/N05

********************************
* Merge
********************************

use "$path_dta_ori\Schlenker\Weather_data_1950_2019.dta", clear
merge 1:1 fips year using "$path_dta_ori\Burke and Emerick (2016)\us_panel.dta", update keepusing(corn* longitude latitude)
tab year _merge

* Keep 1950-2005
keep if year<=2005
tab year _merge

* Keep common counties, 2,819 counties
keep if _merge==3
codebook fips

* Keep East 100th meridian: 2,455 counties
count
sum longitude
keep if longitude>=-100
codebook fips
tab year

* 2,455 counties, 137,480 obs total, but 107,290 with valid area and yield (78%)
* All observations with weather variables
tab year
sum fips corn*
count if corn_area<. & cornyield<.
scalar Nv=r(N)
count
scalar N=r(N)
di Nv/N

* Drop extra variables
drop latitude _merge

* Count counties with balanced data
gen aux=(corn_area==. | cornyield==.)
bysort fips: egen auxc=max(aux)
tab aux auxc
tab year auxc
drop aux auxc

* State varialbe and number of counties per year
gen state=floor(fips/1000)
bysort state year: gen nfips=_N

* Drop observations without yields or area
drop if corn_area>=. | cornyield >=.

* Count number of observations per county
bysort fips: egen nf=count(fips)

* Drop counties in with less than 10 observations
drop if nf<10

* Save
order state fips year longitude nfips corn* dday* prec 
label variable state "State"
label variable nfips "Number of counties per the state-year"
label variable nf "Number of years per county"
label variable prec "Precipitation (mm)"
desc
save "Data_new\Data_long.dta", replace

*************************************************************************
* Descriptive table
*************************************************************************

* Long panel
use "Data_new\Data_long.dta", clear

* Temperature dd0C and precipitation in average daily terms
gen precr = prec/183
gen ddr = dday0C/183
gen areaK = corn_area/1000
sum precr ddr

label variable year "Year"
label variable ddr  "Temperature (C)"
label variable precr "Precipitation (mm)"
label variable areaK "Corn area (1,000s acres)"
label variable cornyield "Corn yields (bu/acre)"

global var_desc ddr precr cornyield areaK 
eststo desc1: estpost tabstat $var_desc, c(stat) stat(n p10 p25 p50 mean p75 p90 sd) 
esttab desc1 using "Results/Data_desc_long.tex", replace f ///
	cells("count(fmt(%12.0fc)) p10(fmt(%12.1fc)) p25(fmt(%12.1fc)) p50(fmt(%12.1fc)) mean(fmt(%12.1fc)) p75(fmt(%12.1fc)) p90(fmt(%12.1fc)) sd(fmt(%12.1fc))") ///
	nostar noobs label nonumber collabels("Obs" "P10" "P25" "Median" "Mean" "P75" "P90" "St. Dev.") ///
	nomtitle nonote unstack

